<!doctype html>
<html>
<head>
<meta charset='UTF-8'><meta name='viewport' content='width=device-width initial-scale=1'>

<link href='https://fonts.loli.net/css?family=Open+Sans:400italic,700italic,700,400&subset=latin,latin-ext' rel='stylesheet' type='text/css' /><style type='text/css'>html {overflow-x: initial !important;}:root { --bg-color:#ffffff; --text-color:#333333; --select-text-bg-color:#B5D6FC; --select-text-font-color:auto; --monospace:"Lucida Console",Consolas,"Courier",monospace; --title-bar-height:20px; }
.mac-os-11 { --title-bar-height:28px; }
html { font-size: 14px; background-color: var(--bg-color); color: var(--text-color); font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; -webkit-font-smoothing: antialiased; }
body { margin: 0px; padding: 0px; height: auto; bottom: 0px; top: 0px; left: 0px; right: 0px; font-size: 1rem; line-height: 1.42857; overflow-x: hidden; background: inherit; tab-size: 4; }
iframe { margin: auto; }
a.url { word-break: break-all; }
a:active, a:hover { outline: 0px; }
.in-text-selection, ::selection { text-shadow: none; background: var(--select-text-bg-color); color: var(--select-text-font-color); }
#write { margin: 0px auto; height: auto; width: inherit; word-break: normal; overflow-wrap: break-word; position: relative; white-space: normal; overflow-x: visible; padding-top: 36px; }
#write.first-line-indent p { text-indent: 2em; }
#write.first-line-indent li p, #write.first-line-indent p * { text-indent: 0px; }
#write.first-line-indent li { margin-left: 2em; }
.for-image #write { padding-left: 8px; padding-right: 8px; }
body.typora-export { padding-left: 30px; padding-right: 30px; }
.typora-export .footnote-line, .typora-export li, .typora-export p { white-space: pre-wrap; }
.typora-export .task-list-item input { pointer-events: none; }
@media screen and (max-width: 500px) {
  body.typora-export { padding-left: 0px; padding-right: 0px; }
  #write { padding-left: 20px; padding-right: 20px; }
  .CodeMirror-sizer { margin-left: 0px !important; }
  .CodeMirror-gutters { display: none !important; }
}
#write li > figure:last-child { margin-bottom: 0.5rem; }
#write ol, #write ul { position: relative; }
img { max-width: 100%; vertical-align: middle; image-orientation: from-image; }
button, input, select, textarea { color: inherit; font: inherit; }
input[type="checkbox"], input[type="radio"] { line-height: normal; padding: 0px; }
*, ::after, ::before { box-sizing: border-box; }
#write h1, #write h2, #write h3, #write h4, #write h5, #write h6, #write p, #write pre { width: inherit; }
#write h1, #write h2, #write h3, #write h4, #write h5, #write h6, #write p { position: relative; }
p { line-height: inherit; }
h1, h2, h3, h4, h5, h6 { break-after: avoid-page; break-inside: avoid; orphans: 4; }
p { orphans: 4; }
h1 { font-size: 2rem; }
h2 { font-size: 1.8rem; }
h3 { font-size: 1.6rem; }
h4 { font-size: 1.4rem; }
h5 { font-size: 1.2rem; }
h6 { font-size: 1rem; }
.md-math-block, .md-rawblock, h1, h2, h3, h4, h5, h6, p { margin-top: 1rem; margin-bottom: 1rem; }
.hidden { display: none; }
.md-blockmeta { color: rgb(204, 204, 204); font-weight: 700; font-style: italic; }
a { cursor: pointer; }
sup.md-footnote { padding: 2px 4px; background-color: rgba(238, 238, 238, 0.7); color: rgb(85, 85, 85); border-radius: 4px; cursor: pointer; }
sup.md-footnote a, sup.md-footnote a:hover { color: inherit; text-transform: inherit; text-decoration: inherit; }
#write input[type="checkbox"] { cursor: pointer; width: inherit; height: inherit; }
figure { overflow-x: auto; margin: 1.2em 0px; max-width: calc(100% + 16px); padding: 0px; }
figure > table { margin: 0px; }
tr { break-inside: avoid; break-after: auto; }
thead { display: table-header-group; }
table { border-collapse: collapse; border-spacing: 0px; width: 100%; overflow: auto; break-inside: auto; text-align: left; }
table.md-table td { min-width: 32px; }
.CodeMirror-gutters { border-right: 0px; background-color: inherit; }
.CodeMirror-linenumber { user-select: none; }
.CodeMirror { text-align: left; }
.CodeMirror-placeholder { opacity: 0.3; }
.CodeMirror pre { padding: 0px 4px; }
.CodeMirror-lines { padding: 0px; }
div.hr:focus { cursor: none; }
#write pre { white-space: pre-wrap; }
#write.fences-no-line-wrapping pre { white-space: pre; }
#write pre.ty-contain-cm { white-space: normal; }
.CodeMirror-gutters { margin-right: 4px; }
.md-fences { font-size: 0.9rem; display: block; break-inside: avoid; text-align: left; overflow: visible; white-space: pre; background: inherit; position: relative !important; }
.md-fences-adv-panel { width: 100%; margin-top: 10px; text-align: center; padding-top: 0px; padding-bottom: 8px; overflow-x: auto; }
#write .md-fences.mock-cm { white-space: pre-wrap; }
.md-fences.md-fences-with-lineno { padding-left: 0px; }
#write.fences-no-line-wrapping .md-fences.mock-cm { white-space: pre; overflow-x: auto; }
.md-fences.mock-cm.md-fences-with-lineno { padding-left: 8px; }
.CodeMirror-line, twitterwidget { break-inside: avoid; }
.footnotes { opacity: 0.8; font-size: 0.9rem; margin-top: 1em; margin-bottom: 1em; }
.footnotes + .footnotes { margin-top: 0px; }
.md-reset { margin: 0px; padding: 0px; border: 0px; outline: 0px; vertical-align: top; background: 0px 0px; text-decoration: none; text-shadow: none; float: none; position: static; width: auto; height: auto; white-space: nowrap; cursor: inherit; -webkit-tap-highlight-color: transparent; line-height: normal; font-weight: 400; text-align: left; box-sizing: content-box; direction: ltr; }
li div { padding-top: 0px; }
blockquote { margin: 1rem 0px; }
li .mathjax-block, li p { margin: 0.5rem 0px; }
li blockquote { margin: 1rem 0px; }
li { margin: 0px; position: relative; }
blockquote > :last-child { margin-bottom: 0px; }
blockquote > :first-child, li > :first-child { margin-top: 0px; }
.footnotes-area { color: rgb(136, 136, 136); margin-top: 0.714rem; padding-bottom: 0.143rem; white-space: normal; }
#write .footnote-line { white-space: pre-wrap; }
@media print {
  body, html { border: 1px solid transparent; height: 99%; break-after: avoid; break-before: avoid; font-variant-ligatures: no-common-ligatures; }
  #write { margin-top: 0px; padding-top: 0px; border-color: transparent !important; }
  .typora-export * { -webkit-print-color-adjust: exact; }
  .typora-export #write { break-after: avoid; }
  .typora-export #write::after { height: 0px; }
  .is-mac table { break-inside: avoid; }
  .typora-export-show-outline .typora-export-sidebar { display: none; }
}
.footnote-line { margin-top: 0.714em; font-size: 0.7em; }
a img, img a { cursor: pointer; }
pre.md-meta-block { font-size: 0.8rem; min-height: 0.8rem; white-space: pre-wrap; background: rgb(204, 204, 204); display: block; overflow-x: hidden; }
p > .md-image:only-child:not(.md-img-error) img, p > img:only-child { display: block; margin: auto; }
#write.first-line-indent p > .md-image:only-child:not(.md-img-error) img { left: -2em; position: relative; }
p > .md-image:only-child { display: inline-block; width: 100%; }
#write .MathJax_Display { margin: 0.8em 0px 0px; }
.md-math-block { width: 100%; }
.md-math-block:not(:empty)::after { display: none; }
.MathJax_ref { fill: currentcolor; }
[contenteditable="true"]:active, [contenteditable="true"]:focus, [contenteditable="false"]:active, [contenteditable="false"]:focus { outline: 0px; box-shadow: none; }
.md-task-list-item { position: relative; list-style-type: none; }
.task-list-item.md-task-list-item { padding-left: 0px; }
.md-task-list-item > input { position: absolute; top: 0px; left: 0px; margin-left: -1.2em; margin-top: calc(1em - 10px); border: none; }
.math { font-size: 1rem; }
.md-toc { min-height: 3.58rem; position: relative; font-size: 0.9rem; border-radius: 10px; }
.md-toc-content { position: relative; margin-left: 0px; }
.md-toc-content::after, .md-toc::after { display: none; }
.md-toc-item { display: block; color: rgb(65, 131, 196); }
.md-toc-item a { text-decoration: none; }
.md-toc-inner:hover { text-decoration: underline; }
.md-toc-inner { display: inline-block; cursor: pointer; }
.md-toc-h1 .md-toc-inner { margin-left: 0px; font-weight: 700; }
.md-toc-h2 .md-toc-inner { margin-left: 2em; }
.md-toc-h3 .md-toc-inner { margin-left: 4em; }
.md-toc-h4 .md-toc-inner { margin-left: 6em; }
.md-toc-h5 .md-toc-inner { margin-left: 8em; }
.md-toc-h6 .md-toc-inner { margin-left: 10em; }
@media screen and (max-width: 48em) {
  .md-toc-h3 .md-toc-inner { margin-left: 3.5em; }
  .md-toc-h4 .md-toc-inner { margin-left: 5em; }
  .md-toc-h5 .md-toc-inner { margin-left: 6.5em; }
  .md-toc-h6 .md-toc-inner { margin-left: 8em; }
}
a.md-toc-inner { font-size: inherit; font-style: inherit; font-weight: inherit; line-height: inherit; }
.footnote-line a:not(.reversefootnote) { color: inherit; }
.md-attr { display: none; }
.md-fn-count::after { content: "."; }
code, pre, samp, tt { font-family: var(--monospace); }
kbd { margin: 0px 0.1em; padding: 0.1em 0.6em; font-size: 0.8em; color: rgb(36, 39, 41); background: rgb(255, 255, 255); border: 1px solid rgb(173, 179, 185); border-radius: 3px; box-shadow: rgba(12, 13, 14, 0.2) 0px 1px 0px, rgb(255, 255, 255) 0px 0px 0px 2px inset; white-space: nowrap; vertical-align: middle; }
.md-comment { color: rgb(162, 127, 3); opacity: 0.8; font-family: var(--monospace); }
code { text-align: left; vertical-align: initial; }
a.md-print-anchor { white-space: pre !important; border-width: initial !important; border-style: none !important; border-color: initial !important; display: inline-block !important; position: absolute !important; width: 1px !important; right: 0px !important; outline: 0px !important; background: 0px 0px !important; text-decoration: initial !important; text-shadow: initial !important; }
.md-inline-math .MathJax_SVG .noError { display: none !important; }
.html-for-mac .inline-math-svg .MathJax_SVG { vertical-align: 0.2px; }
.md-fences-math .MathJax_SVG_Display, .md-math-block .MathJax_SVG_Display { text-align: center; margin: 0px; position: relative; text-indent: 0px; max-width: none; max-height: none; min-height: 0px; min-width: 100%; width: auto; overflow-y: visible; display: block !important; }
.MathJax_SVG_Display, .md-inline-math .MathJax_SVG_Display { width: auto; margin: inherit; display: inline-block !important; }
.MathJax_SVG .MJX-monospace { font-family: var(--monospace); }
.MathJax_SVG .MJX-sans-serif { font-family: sans-serif; }
.MathJax_SVG { display: inline; font-style: normal; font-weight: 400; line-height: normal; text-indent: 0px; text-align: left; text-transform: none; letter-spacing: normal; word-spacing: normal; overflow-wrap: normal; white-space: nowrap; float: none; direction: ltr; max-width: none; max-height: none; min-width: 0px; min-height: 0px; border: 0px; padding: 0px; margin: 0px; zoom: 90%; }
#math-inline-preview-content { zoom: 1.1; }
.MathJax_SVG * { transition: none 0s ease 0s; }
.MathJax_SVG_Display svg { vertical-align: middle !important; margin-bottom: 0px !important; margin-top: 0px !important; }
.os-windows.monocolor-emoji .md-emoji { font-family: "Segoe UI Symbol", sans-serif; }
.md-diagram-panel > svg { max-width: 100%; }
[lang="flow"] svg, [lang="mermaid"] svg { max-width: 100%; height: auto; }
[lang="mermaid"] .node text { font-size: 1rem; }
table tr th { border-bottom: 0px; }
video { max-width: 100%; display: block; margin: 0px auto; }
iframe { max-width: 100%; width: 100%; border: none; }
.highlight td, .highlight tr { border: 0px; }
mark { background: rgb(255, 255, 0); color: rgb(0, 0, 0); }
.md-html-inline .md-plain, .md-html-inline strong, mark .md-inline-math, mark strong { color: inherit; }
.md-expand mark .md-meta { opacity: 0.3 !important; }
mark .md-meta { color: rgb(0, 0, 0); }
@media print {
  .typora-export h1, .typora-export h2, .typora-export h3, .typora-export h4, .typora-export h5, .typora-export h6 { break-inside: avoid; }
}
.md-diagram-panel .messageText { stroke: none !important; }
.md-diagram-panel .start-state { fill: var(--node-fill); }
.md-diagram-panel .edgeLabel rect { opacity: 1 !important; }
.md-require-zoom-fix foreignobject { font-size: var(--mermaid-font-zoom); }
.md-fences.md-fences-math { font-size: 1em; }
.md-fences-math .MathJax_SVG_Display { margin-top: 8px; cursor: default; }
.md-fences-advanced:not(.md-focus) { padding: 0px; white-space: nowrap; border: 0px; }
.md-fences-advanced:not(.md-focus) { background: inherit; }
.typora-export-show-outline .typora-export-content { max-width: 1440px; margin: auto; display: flex; flex-direction: row; }
.typora-export-sidebar { width: 300px; font-size: 0.8rem; margin-top: 80px; margin-right: 18px; }
.typora-export-show-outline #write { --webkit-flex:2; flex: 2 1 0%; }
.typora-export-sidebar .outline-content { position: fixed; top: 0px; max-height: 100%; overflow: hidden auto; padding-bottom: 30px; padding-top: 60px; width: 300px; }
@media screen and (max-width: 1024px) {
  .typora-export-sidebar, .typora-export-sidebar .outline-content { width: 240px; }
}
@media screen and (max-width: 800px) {
  .typora-export-sidebar { display: none; }
}
.outline-content li, .outline-content ul { margin-left: 0px; margin-right: 0px; padding-left: 0px; padding-right: 0px; list-style: none; }
.outline-content ul { margin-top: 0px; margin-bottom: 0px; }
.outline-content strong { font-weight: 400; }
.outline-expander { width: 1rem; height: 1.42857rem; position: relative; display: table-cell; vertical-align: middle; cursor: pointer; padding-left: 4px; }
.outline-expander::before { content: ""; position: relative; font-family: Ionicons; display: inline-block; font-size: 8px; vertical-align: middle; }
.outline-item { padding-top: 3px; padding-bottom: 3px; cursor: pointer; }
.outline-expander:hover::before { content: ""; }
.outline-h1 > .outline-item { padding-left: 0px; }
.outline-h2 > .outline-item { padding-left: 1em; }
.outline-h3 > .outline-item { padding-left: 2em; }
.outline-h4 > .outline-item { padding-left: 3em; }
.outline-h5 > .outline-item { padding-left: 4em; }
.outline-h6 > .outline-item { padding-left: 5em; }
.outline-label { cursor: pointer; display: table-cell; vertical-align: middle; text-decoration: none; color: inherit; }
.outline-label:hover { text-decoration: underline; }
.outline-item:hover { border-color: rgb(245, 245, 245); background-color: var(--item-hover-bg-color); }
.outline-item:hover { margin-left: -28px; margin-right: -28px; border-left: 28px solid transparent; border-right: 28px solid transparent; }
.outline-item-single .outline-expander::before, .outline-item-single .outline-expander:hover::before { display: none; }
.outline-item-open > .outline-item > .outline-expander::before { content: ""; }
.outline-children { display: none; }
.info-panel-tab-wrapper { display: none; }
.outline-item-open > .outline-children { display: block; }
.typora-export .outline-item { padding-top: 1px; padding-bottom: 1px; }
.typora-export .outline-item:hover { margin-right: -8px; border-right: 8px solid transparent; }
.typora-export .outline-expander::before { content: "+"; font-family: inherit; top: -1px; }
.typora-export .outline-expander:hover::before, .typora-export .outline-item-open > .outline-item > .outline-expander::before { content: "−"; }
.typora-export-collapse-outline .outline-children { display: none; }
.typora-export-collapse-outline .outline-item-open > .outline-children, .typora-export-no-collapse-outline .outline-children { display: block; }
.typora-export-no-collapse-outline .outline-expander::before { content: "" !important; }
.typora-export-show-outline .outline-item-active > .outline-item .outline-label { font-weight: 700; }


:root {
    --side-bar-bg-color: #fafafa;
    --control-text-color: #777;
}

@include-when-export url(https://fonts.loli.net/css?family=Open+Sans:400italic,700italic,700,400&subset=latin,latin-ext);

/* open-sans-regular - latin-ext_latin */
  /* open-sans-italic - latin-ext_latin */
    /* open-sans-700 - latin-ext_latin */
    /* open-sans-700italic - latin-ext_latin */
  html {
    font-size: 16px;
}

body {
    font-family: "Open Sans","Clear Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
    color: rgb(51, 51, 51);
    line-height: 1.6;
}

#write {
    max-width: 860px;
  	margin: 0 auto;
  	padding: 30px;
    padding-bottom: 100px;
}

@media only screen and (min-width: 1400px) {
	#write {
		max-width: 1024px;
	}
}

@media only screen and (min-width: 1800px) {
	#write {
		max-width: 1200px;
	}
}

#write > ul:first-child,
#write > ol:first-child{
    margin-top: 30px;
}

a {
    color: #4183C4;
}
h1,
h2,
h3,
h4,
h5,
h6 {
    position: relative;
    margin-top: 1rem;
    margin-bottom: 1rem;
    font-weight: bold;
    line-height: 1.4;
    cursor: text;
}
h1:hover a.anchor,
h2:hover a.anchor,
h3:hover a.anchor,
h4:hover a.anchor,
h5:hover a.anchor,
h6:hover a.anchor {
    text-decoration: none;
}
h1 tt,
h1 code {
    font-size: inherit;
}
h2 tt,
h2 code {
    font-size: inherit;
}
h3 tt,
h3 code {
    font-size: inherit;
}
h4 tt,
h4 code {
    font-size: inherit;
}
h5 tt,
h5 code {
    font-size: inherit;
}
h6 tt,
h6 code {
    font-size: inherit;
}
h1 {
    font-size: 2.25em;
    line-height: 1.2;
    border-bottom: 1px solid #eee;
}
h2 {
    font-size: 1.75em;
    line-height: 1.225;
    border-bottom: 1px solid #eee;
}

/*@media print {
    .typora-export h1,
    .typora-export h2 {
        border-bottom: none;
        padding-bottom: initial;
    }

    .typora-export h1::after,
    .typora-export h2::after {
        content: "";
        display: block;
        height: 100px;
        margin-top: -96px;
        border-top: 1px solid #eee;
    }
}*/

h3 {
    font-size: 1.5em;
    line-height: 1.43;
}
h4 {
    font-size: 1.25em;
}
h5 {
    font-size: 1em;
}
h6 {
   font-size: 1em;
    color: #777;
}
p,
blockquote,
ul,
ol,
dl,
table{
    margin: 0.8em 0;
}
li>ol,
li>ul {
    margin: 0 0;
}
hr {
    height: 2px;
    padding: 0;
    margin: 16px 0;
    background-color: #e7e7e7;
    border: 0 none;
    overflow: hidden;
    box-sizing: content-box;
}

li p.first {
    display: inline-block;
}
ul,
ol {
    padding-left: 30px;
}
ul:first-child,
ol:first-child {
    margin-top: 0;
}
ul:last-child,
ol:last-child {
    margin-bottom: 0;
}
blockquote {
    border-left: 4px solid #dfe2e5;
    padding: 0 15px;
    color: #777777;
}
blockquote blockquote {
    padding-right: 0;
}
table {
    padding: 0;
    word-break: initial;
}
table tr {
    border: 1px solid #dfe2e5;
    margin: 0;
    padding: 0;
}
table tr:nth-child(2n),
thead {
    background-color: #f8f8f8;
}
table th {
    font-weight: bold;
    border: 1px solid #dfe2e5;
    border-bottom: 0;
    margin: 0;
    padding: 6px 13px;
}
table td {
    border: 1px solid #dfe2e5;
    margin: 0;
    padding: 6px 13px;
}
table th:first-child,
table td:first-child {
    margin-top: 0;
}
table th:last-child,
table td:last-child {
    margin-bottom: 0;
}

.CodeMirror-lines {
    padding-left: 4px;
}

.code-tooltip {
    box-shadow: 0 1px 1px 0 rgba(0,28,36,.3);
    border-top: 1px solid #eef2f2;
}

.md-fences,
code,
tt {
    border: 1px solid #e7eaed;
    background-color: #f8f8f8;
    border-radius: 3px;
    padding: 0;
    padding: 2px 4px 0px 4px;
    font-size: 0.9em;
}

code {
    background-color: #f3f4f4;
    padding: 0 2px 0 2px;
}

.md-fences {
    margin-bottom: 15px;
    margin-top: 15px;
    padding-top: 8px;
    padding-bottom: 6px;
}


.md-task-list-item > input {
  margin-left: -1.3em;
}

@media print {
    html {
        font-size: 13px;
    }
    table,
    pre {
        page-break-inside: avoid;
    }
    pre {
        word-wrap: break-word;
    }
}

.md-fences {
	background-color: #f8f8f8;
}
#write pre.md-meta-block {
	padding: 1rem;
    font-size: 85%;
    line-height: 1.45;
    background-color: #f7f7f7;
    border: 0;
    border-radius: 3px;
    color: #777777;
    margin-top: 0 !important;
}

.mathjax-block>.code-tooltip {
	bottom: .375rem;
}

.md-mathjax-midline {
    background: #fafafa;
}

#write>h3.md-focus:before{
	left: -1.5625rem;
	top: .375rem;
}
#write>h4.md-focus:before{
	left: -1.5625rem;
	top: .285714286rem;
}
#write>h5.md-focus:before{
	left: -1.5625rem;
	top: .285714286rem;
}
#write>h6.md-focus:before{
	left: -1.5625rem;
	top: .285714286rem;
}
.md-image>.md-meta {
    /*border: 1px solid #ddd;*/
    border-radius: 3px;
    padding: 2px 0px 0px 4px;
    font-size: 0.9em;
    color: inherit;
}

.md-tag {
    color: #a7a7a7;
    opacity: 1;
}

.md-toc { 
    margin-top:20px;
    padding-bottom:20px;
}

.sidebar-tabs {
    border-bottom: none;
}

#typora-quick-open {
    border: 1px solid #ddd;
    background-color: #f8f8f8;
}

#typora-quick-open-item {
    background-color: #FAFAFA;
    border-color: #FEFEFE #e5e5e5 #e5e5e5 #eee;
    border-style: solid;
    border-width: 1px;
}

/** focus mode */
.on-focus-mode blockquote {
    border-left-color: rgba(85, 85, 85, 0.12);
}

header, .context-menu, .megamenu-content, footer{
    font-family: "Segoe UI", "Arial", sans-serif;
}

.file-node-content:hover .file-node-icon,
.file-node-content:hover .file-node-open-state{
    visibility: visible;
}

.mac-seamless-mode #typora-sidebar {
    background-color: #fafafa;
    background-color: var(--side-bar-bg-color);
}

.md-lang {
    color: #b4654d;
}

/*.html-for-mac {
    --item-hover-bg-color: #E6F0FE;
}*/

#md-notification .btn {
    border: 0;
}

.dropdown-menu .divider {
    border-color: #e5e5e5;
    opacity: 0.4;
}

.ty-preferences .window-content {
    background-color: #fafafa;
}

.ty-preferences .nav-group-item.active {
    color: white;
    background: #999;
}

.menu-item-container a.menu-style-btn {
    background-color: #f5f8fa;
    background-image: linear-gradient( 180deg , hsla(0, 0%, 100%, 0.8), hsla(0, 0%, 100%, 0)); 
}


 :root {--mermaid-font-zoom:1.5em ;} 
</style><title>errata202101</title>
</head>
<body class='typora-export os-windows'><div class='typora-export-content'>
<div id='write'  class=''><h1 id='强化学习原理与python实现更新与勘误'><span>《强化学习：原理与Python实现》更新与勘误</span></h1><p><span>（2021年01月第1版第6次印刷）</span></p><h3 id='行数计算方法'><span>行数计算方法</span></h3><p><span>本勘误文档中，行数计算“第</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="0.801ex" height="1.994ex" viewBox="0 -755.9 345 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E27-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E27-MJMATHI-69" x="0" y="0"></use></g></svg></span><script type="math/tex">i</script><span>行”（</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="13.203ex" height="2.344ex" viewBox="0 -755.9 5684.6 1009.2" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E28-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E28-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E28-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path stroke-width="0" id="E28-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E28-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E28-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E28-MJMAIN-2026" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60ZM525 60Q525 84 542 102T585 120Q609 120 627 104T646 61Q646 36 629 18T586 0T543 17T525 60ZM972 60Q972 84 989 102T1032 120Q1056 120 1074 104T1093 61Q1093 36 1076 18T1033 0T990 17T972 60Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E28-MJMATHI-69" x="0" y="0"></use><use xlink:href="#E28-MJMAIN-3D" x="622" y="0"></use><use xlink:href="#E28-MJMAIN-30" x="1678" y="0"></use><use xlink:href="#E28-MJMAIN-2C" x="2178" y="0"></use><use xlink:href="#E28-MJMAIN-31" x="2623" y="0"></use><use xlink:href="#E28-MJMAIN-2C" x="3123" y="0"></use><use xlink:href="#E28-MJMAIN-32" x="3567" y="0"></use><use xlink:href="#E28-MJMAIN-2C" x="4067" y="0"></use><use xlink:href="#E28-MJMAIN-2026" x="4512" y="0"></use></g></svg></span><script type="math/tex">i=0,1,2,\ldots</script><span>）是从0开始计数的。小节标题、公式、内联代码、注意、本章要点记入行数，章标题、图、表、代码清单及它们的题注不计入行数。空行不计入行数。</span></p><p><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="0" height="0.243ex" viewBox="0 -52.3 0 104.5" role="img" focusable="false" style="vertical-align: -0.121ex;"><defs></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"></g></svg></span><script type="math/tex">
\newcommand{\sfA}{\unicode{x1d608}}
\newcommand{\sfS}{\unicode{x1d61a}}
\newcommand{\sfa}{\unicode{x1d622}}
\newcommand{\sfs}{\unicode{x1d634}}
\newcommand{\bftheta}{\pmb{\unicode{x3B8}}}
\newcommand{\E}{\textrm{E}}
</script></p><h2 id='第17页第6行'><span>第17页第6行</span></h2><p><span>作者注：</span>
<span>“轨道”又称为“轨迹”。本书中这两个词混用。</span></p><h2 id='第30页'><span>第30页</span></h2><p><span>作者注：用线性规划求解最优状态价值的详细证明可见《Markov Decision Processes: Discrete Stochastic Dynamic Programming》（Martin Puterman）的第6.9节。证明大致如下：当</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="4.212ex" height="2.577ex" viewBox="0 -806.1 1813.7 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E32-MJMATHI-63" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path stroke-width="0" id="E32-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E32-MJSSI-73" d="M99 299Q99 318 106 341T133 393T195 441T298 461Q336 461 370 453T420 437L436 429Q436 428 421 389T405 350Q356 386 273 386H265Q248 386 237 384T211 371T191 337Q189 329 189 326Q189 320 190 315T194 306T200 299T209 293T218 289T228 285T239 283T251 281T263 278L270 276Q278 275 283 274T298 270T316 264T333 255T351 243T367 228T380 209T388 186T391 157Q391 96 341 43T193 -11Q171 -11 150 -8T114 -1T84 9T61 19T45 28T35 33Q35 36 67 116L76 109Q132 67 211 67Q258 67 279 88T301 135Q301 159 280 170T224 187T180 197Q141 212 120 239T99 299Z"></path><path stroke-width="0" id="E32-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E32-MJMATHI-63" x="0" y="0"></use><g transform="translate(599,0)"><use xlink:href="#E32-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E32-MJSSI-73" x="389" y="0"></use><use xlink:href="#E32-MJMAIN-29" x="825" y="0"></use></g></g></svg></span><script type="math/tex">c\left(\sfs\right)</script><span>为Markov决策过程的初始状态分布时，原问题和对偶问题的目标都是Markov决策过程的平均回合奖励，原问题的最优解是最优状态价值。对</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="4.212ex" height="2.577ex" viewBox="0 -806.1 1813.7 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E32-MJMATHI-63" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path stroke-width="0" id="E32-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E32-MJSSI-73" d="M99 299Q99 318 106 341T133 393T195 441T298 461Q336 461 370 453T420 437L436 429Q436 428 421 389T405 350Q356 386 273 386H265Q248 386 237 384T211 371T191 337Q189 329 189 326Q189 320 190 315T194 306T200 299T209 293T218 289T228 285T239 283T251 281T263 278L270 276Q278 275 283 274T298 270T316 264T333 255T351 243T367 228T380 209T388 186T391 157Q391 96 341 43T193 -11Q171 -11 150 -8T114 -1T84 9T61 19T45 28T35 33Q35 36 67 116L76 109Q132 67 211 67Q258 67 279 88T301 135Q301 159 280 170T224 187T180 197Q141 212 120 239T99 299Z"></path><path stroke-width="0" id="E32-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E32-MJMATHI-63" x="0" y="0"></use><g transform="translate(599,0)"><use xlink:href="#E32-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E32-MJSSI-73" x="389" y="0"></use><use xlink:href="#E32-MJMAIN-29" x="825" y="0"></use></g></g></svg></span><script type="math/tex">c\left(\sfs\right)</script><span>做灵敏度分析可知，无论</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="4.212ex" height="2.577ex" viewBox="0 -806.1 1813.7 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E32-MJMATHI-63" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path stroke-width="0" id="E32-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E32-MJSSI-73" d="M99 299Q99 318 106 341T133 393T195 441T298 461Q336 461 370 453T420 437L436 429Q436 428 421 389T405 350Q356 386 273 386H265Q248 386 237 384T211 371T191 337Q189 329 189 326Q189 320 190 315T194 306T200 299T209 293T218 289T228 285T239 283T251 281T263 278L270 276Q278 275 283 274T298 270T316 264T333 255T351 243T367 228T380 209T388 186T391 157Q391 96 341 43T193 -11Q171 -11 150 -8T114 -1T84 9T61 19T45 28T35 33Q35 36 67 116L76 109Q132 67 211 67Q258 67 279 88T301 135Q301 159 280 170T224 187T180 197Q141 212 120 239T99 299Z"></path><path stroke-width="0" id="E32-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E32-MJMATHI-63" x="0" y="0"></use><g transform="translate(599,0)"><use xlink:href="#E32-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E32-MJSSI-73" x="389" y="0"></use><use xlink:href="#E32-MJMAIN-29" x="825" y="0"></use></g></g></svg></span><script type="math/tex">c\left(\sfs\right)</script><span>取什么分布，对偶问题均有解（这利用到对偶问题的可行域就是带折扣的状态动作对组成的分布所在的空间），所以原问题的最优解都是不变的。也就是说，Markov决策过程的最优状态价值和初始状态分布无关。进一步，可以在原问题中放宽</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="12.259ex" height="2.694ex" viewBox="0 -806.1 5278.2 1160" role="img" focusable="false" style="vertical-align: -0.822ex;"><defs><path stroke-width="0" id="E33-MJSZ1-2211" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path stroke-width="0" id="E33-MJSSI-73" d="M99 299Q99 318 106 341T133 393T195 441T298 461Q336 461 370 453T420 437L436 429Q436 428 421 389T405 350Q356 386 273 386H265Q248 386 237 384T211 371T191 337Q189 329 189 326Q189 320 190 315T194 306T200 299T209 293T218 289T228 285T239 283T251 281T263 278L270 276Q278 275 283 274T298 270T316 264T333 255T351 243T367 228T380 209T388 186T391 157Q391 96 341 43T193 -11Q171 -11 150 -8T114 -1T84 9T61 19T45 28T35 33Q35 36 67 116L76 109Q132 67 211 67Q258 67 279 88T301 135Q301 159 280 170T224 187T180 197Q141 212 120 239T99 299Z"></path><path stroke-width="0" id="E33-MJMATHI-63" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path stroke-width="0" id="E33-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E33-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E33-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E33-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E33-MJSZ1-2211" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E33-MJSSI-73" x="1493" y="-404"></use><use xlink:href="#E33-MJMATHI-63" x="1630" y="0"></use><g transform="translate(2230,0)"><use xlink:href="#E33-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E33-MJSSI-73" x="389" y="0"></use><use xlink:href="#E33-MJMAIN-29" x="825" y="0"></use></g><use xlink:href="#E33-MJMAIN-3D" x="3722" y="0"></use><use xlink:href="#E33-MJMAIN-31" x="4778" y="0"></use></g></svg></span><script type="math/tex">\sum\nolimits_{\sfs}c\left(\sfs\right)=1</script><span>这个限制，原问题的解依然不变，只是优化目标进行了放缩。</span></p><h2 id='第58页最后一行'><span>第58页最后一行</span></h2><p><span>态</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.788ex" height="2.11ex" viewBox="0 -554.9 1630.8 908.7" role="img" focusable="false" style="vertical-align: -0.822ex;"><defs><path stroke-width="0" id="E34-MJSSI-73" d="M99 299Q99 318 106 341T133 393T195 441T298 461Q336 461 370 453T420 437L436 429Q436 428 421 389T405 350Q356 386 273 386H265Q248 386 237 384T211 371T191 337Q189 329 189 326Q189 320 190 315T194 306T200 299T209 293T218 289T228 285T239 283T251 281T263 278L270 276Q278 275 283 274T298 270T316 264T333 255T351 243T367 228T380 209T388 186T391 157Q391 96 341 43T193 -11Q171 -11 150 -8T114 -1T84 9T61 19T45 28T35 33Q35 36 67 116L76 109Q132 67 211 67Q258 67 279 88T301 135Q301 159 280 170T224 187T180 197Q141 212 120 239T99 299Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E34-MJSSI-73" x="0" y="0"></use><g transform="translate(383,-156)"><text font-family="STIXGeneral, 'PingFang SC', serif" stroke="none" transform="scale(35.539) matrix(1 0 0 -1 0 0)">起</text><g transform="translate(579,0)"><text font-family="STIXGeneral, 'PingFang SC', serif" stroke="none" transform="scale(35.539) matrix(1 0 0 -1 0 0)">始</text></g></g></g></svg></span><script type="math/tex">\sfs_{起始}</script></p><h4 id='改为-1'><span>改为</span></h4><p><span>态</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.788ex" height="2.11ex" viewBox="0 -554.9 1630.8 908.7" role="img" focusable="false" style="vertical-align: -0.822ex;"><defs><path stroke-width="0" id="E35-MJSSI-73" d="M99 299Q99 318 106 341T133 393T195 441T298 461Q336 461 370 453T420 437L436 429Q436 428 421 389T405 350Q356 386 273 386H265Q248 386 237 384T211 371T191 337Q189 329 189 326Q189 320 190 315T194 306T200 299T209 293T218 289T228 285T239 283T251 281T263 278L270 276Q278 275 283 274T298 270T316 264T333 255T351 243T367 228T380 209T388 186T391 157Q391 96 341 43T193 -11Q171 -11 150 -8T114 -1T84 9T61 19T45 28T35 33Q35 36 67 116L76 109Q132 67 211 67Q258 67 279 88T301 135Q301 159 280 170T224 187T180 197Q141 212 120 239T99 299Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E35-MJSSI-73" x="0" y="0"></use><g transform="translate(383,-156)"><text font-family="STIXGeneral, 'PingFang SC', serif" stroke="none" transform="scale(35.539) matrix(1 0 0 -1 0 0)">开</text><g transform="translate(579,0)"><text font-family="STIXGeneral, 'PingFang SC', serif" stroke="none" transform="scale(35.539) matrix(1 0 0 -1 0 0)">始</text></g></g></g></svg></span><script type="math/tex">\sfs_{开始}</script></p><h2 id='第74页正文第0行'><span>第74页正文第0行</span></h2><p><code>monte_carlo_importance_resample</code></p><p><span>改为</span></p><p><code>monte_carlo_importance_sample</code></p><h2 id='第85页算法5-10输入'><span>第85页算法5-10输入</span></h2><p><span>输入：环境（无数学描述）、策略</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.331ex" height="1.41ex" viewBox="0 -504.6 573 607.1" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E37-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E37-MJMATHI-3C0" x="0" y="0"></use></g></svg></span><script type="math/tex">\pi</script><span>。</span></p><h4 id='改为-2'><span>改为</span></h4><p><span>输入：环境（无数学描述）、如果是策略评估还需要输入策略</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.331ex" height="1.41ex" viewBox="0 -504.6 573 607.1" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E37-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E37-MJMATHI-3C0" x="0" y="0"></use></g></svg></span><script type="math/tex">\pi</script><span>。</span></p><h2 id='第85页算法5-10第231步'><span>第85页算法5-10第2.3.1步</span></h2><p><span>根据</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="10.486ex" height="2.577ex" viewBox="0 -806.1 4514.9 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E38-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path><path stroke-width="0" id="E38-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E38-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path stroke-width="0" id="E38-MJMAIN-2223" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E38-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E38-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E38-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E38-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E38-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E38-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(739,0)"><use xlink:href="#E38-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E38-MJMAIN-22C5" x="389" y="0"></use><use xlink:href="#E38-MJMAIN-2223" x="944" y="0"></use><g transform="translate(1500,0)"><use xlink:href="#E38-MJSSI-53" x="0" y="0"></use><g transform="translate(556,-150)"><use transform="scale(0.707)" xlink:href="#E38-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E38-MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E38-MJMATHI-6E" x="1139" y="0"></use></g></g><use xlink:href="#E38-MJMAIN-29" x="3386" y="0"></use></g></g></svg></span><script type="math/tex">\pi\left(\cdot\mid\sfS_{t+n}\right)</script><span>决定</span></p><h4 id='改为-3'><span>改为</span></h4><p><span>根据</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="10.152ex" height="2.577ex" viewBox="0 -806.1 4370.9 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E39-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E39-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E39-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path stroke-width="0" id="E39-MJMAIN-2223" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E39-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E39-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E39-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E39-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E39-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E39-MJMATHI-62" x="0" y="0"></use><g transform="translate(595,0)"><use xlink:href="#E39-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E39-MJMAIN-22C5" x="389" y="0"></use><use xlink:href="#E39-MJMAIN-2223" x="944" y="0"></use><g transform="translate(1500,0)"><use xlink:href="#E39-MJSSI-53" x="0" y="0"></use><g transform="translate(556,-150)"><use transform="scale(0.707)" xlink:href="#E39-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E39-MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E39-MJMATHI-6E" x="1139" y="0"></use></g></g><use xlink:href="#E39-MJMAIN-29" x="3386" y="0"></use></g></g></svg></span><script type="math/tex">b\left(\cdot\mid\sfS_{t+n}\right)</script><span>决定</span></p><h2 id='第91页算法5-13参数部分第92页算法5-14参数部分'><span>第91页算法5-13参数部分、第92页算法5-14参数部分</span></h2><p><span>参数：资格迹参数</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.331ex" height="2.461ex" viewBox="0 -806.1 573 1059.4" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E42-MJMATHI-3B2" d="M29 -194Q23 -188 23 -186Q23 -183 102 134T186 465Q208 533 243 584T309 658Q365 705 429 705H431Q493 705 533 667T573 570Q573 465 469 396L482 383Q533 332 533 252Q533 139 448 65T257 -10Q227 -10 203 -2T165 17T143 40T131 59T126 65L62 -188Q60 -194 42 -194H29ZM353 431Q392 431 427 419L432 422Q436 426 439 429T449 439T461 453T472 471T484 495T493 524T501 560Q503 569 503 593Q503 611 502 616Q487 667 426 667Q384 667 347 643T286 582T247 514T224 455Q219 439 186 308T152 168Q151 163 151 147Q151 99 173 68Q204 26 260 26Q302 26 349 51T425 137Q441 171 449 214T457 279Q457 337 422 372Q380 358 347 358H337Q258 358 258 389Q258 396 261 403Q275 431 353 431Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E42-MJMATHI-3B2" x="0" y="0"></use></g></svg></span><script type="math/tex">\beta</script><span>、</span></p><h4 id='改为-4'><span>改为</span></h4><p><span>参数：资格迹参数</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.354ex" height="1.994ex" viewBox="0 -755.9 583 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E41-MJMATHI-3BB" d="M166 673Q166 685 183 694H202Q292 691 316 644Q322 629 373 486T474 207T524 67Q531 47 537 34T546 15T551 6T555 2T556 -2T550 -11H482Q457 3 450 18T399 152L354 277L340 262Q327 246 293 207T236 141Q211 112 174 69Q123 9 111 -1T83 -12Q47 -12 47 20Q47 37 61 52T199 187Q229 216 266 252T321 306L338 322Q338 323 288 462T234 612Q214 657 183 657Q166 657 166 673Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E41-MJMATHI-3BB" x="0" y="0"></use></g></svg></span><script type="math/tex">\lambda</script><span>和</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.331ex" height="2.461ex" viewBox="0 -806.1 573 1059.4" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E42-MJMATHI-3B2" d="M29 -194Q23 -188 23 -186Q23 -183 102 134T186 465Q208 533 243 584T309 658Q365 705 429 705H431Q493 705 533 667T573 570Q573 465 469 396L482 383Q533 332 533 252Q533 139 448 65T257 -10Q227 -10 203 -2T165 17T143 40T131 59T126 65L62 -188Q60 -194 42 -194H29ZM353 431Q392 431 427 419L432 422Q436 426 439 429T449 439T461 453T472 471T484 495T493 524T501 560Q503 569 503 593Q503 611 502 616Q487 667 426 667Q384 667 347 643T286 582T247 514T224 455Q219 439 186 308T152 168Q151 163 151 147Q151 99 173 68Q204 26 260 26Q302 26 349 51T425 137Q441 171 449 214T457 279Q457 337 422 372Q380 358 347 358H337Q258 358 258 389Q258 396 261 403Q275 431 353 431Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E42-MJMATHI-3B2" x="0" y="0"></use></g></svg></span><script type="math/tex">\beta</script><span>、</span></p><h2 id='第91页算法5-13第236步'><span>第91页算法5-13第2.3.6步</span></h2><p><span>退出2.2步</span></p><h4 id='改为-5'><span>改为</span></h4><p><span>退出2.3步</span></p><h2 id='第105页算法6-5第21步'><span>第105页算法6-5第2.1步</span></h2><h4 id='增加'><span>增加</span></h4><p><span>初始化资格迹</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="6.135ex" height="1.994ex" viewBox="0 -755.9 2641.6 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E43-MJMAINB-7A" d="M48 262Q48 264 54 349T60 436V444H252Q289 444 336 444T394 445Q441 445 450 441T459 418Q459 406 458 404Q456 399 327 229T194 55H237Q260 56 268 56T297 58T325 65T348 77T370 98T384 128T395 170Q400 197 400 216Q400 217 431 217H462V211Q461 208 453 108T444 6V0H245Q46 0 43 2Q32 7 32 28V33Q32 41 40 52T84 112Q129 170 164 217L298 393H256Q189 392 165 380Q124 360 115 303Q110 280 110 256Q110 254 79 254H48V262Z"></path><path stroke-width="0" id="E43-MJMAIN-2190" d="M944 261T944 250T929 230H165Q167 228 182 216T211 189T244 152T277 96T303 25Q308 7 308 0Q308 -11 288 -11Q281 -11 278 -11T272 -7T267 2T263 21Q245 94 195 151T73 236Q58 242 55 247Q55 254 59 257T73 264Q121 283 158 314T215 375T247 434T264 480L267 497Q269 503 270 505T275 509T288 511Q308 511 308 500Q308 493 303 475Q293 438 278 406T246 352T215 315T185 287T165 270H929Q944 261 944 250Z"></path><path stroke-width="0" id="E43-MJMAINB-30" d="M266 654H280H282Q500 654 524 418Q529 370 529 320Q529 125 456 52Q397 -10 287 -10Q110 -10 63 154Q45 212 45 316Q45 504 113 585Q140 618 185 636T266 654ZM374 548Q347 604 286 604Q247 604 218 575Q197 552 193 511T188 311Q188 159 196 116Q202 87 225 64T287 41Q339 41 367 87Q379 107 382 152T386 329Q386 518 374 548Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E43-MJMAINB-7A" x="0" y="0"></use><use xlink:href="#E43-MJMAIN-2190" x="788" y="0"></use><use xlink:href="#E43-MJMAINB-30" x="2066" y="0"></use></g></svg></span><script type="math/tex">\mathbf{z}\leftarrow\mathbf{0}</script></p><h2 id='第114页644节'><span>第114页6.4.4节</span></h2><p><span>本节将“duel network”译作“对偶网络”有误。可译为“决斗网络”。相应的“对偶Q网络”、“对偶深度Q网络”可改为“决斗Q网络”、“决斗深度Q网络”。</span></p><h2 id='第118页'><span>第118页</span></h2><p><span>作者注：</span></p><p><span>砖瓦编码是一种历史悠久的特征构造方法，可用于回归、分类等问题。目前学术界倾向于用神经网络代替砖瓦编码来构造特征。由于砖瓦编码和强化学习没有直接关联，本书没有用过多的篇幅介绍砖瓦编码。</span></p><p><span>实际使用砖瓦编码时，不需要精确计算砖瓦的数量，常随意的大致估计砖瓦的数量作为特征数。如果设置的特征数大于真实的砖瓦数量，那么有些特征永远不会取到，有些浪费；如果设置的特征数小于真实的砖瓦数量，那么有多个砖瓦需要共享特征，具体逻辑可以见代码清单6-3中“冲突处理”部分。这些浪费和冲突往往不会造成明显的性能损失。</span></p><p><span>第118页砖瓦数计算：每个大网格的网格宽度刚好是整个取值范围的1/8。所以，第0层大网格每个维度有8个大网格；第1~7层由于有偏移，每个维度需要有9个大网格才能覆盖整个取值范围。第117页图6-3b的情况略有不同：这个图中每个维度的取值范围不是大网格的长度的整数倍。所以有些层偏移后，不需要更多的大网格也可以覆盖整个取值范围。</span></p><h2 id='第127页第10行'><span>第127页第10行</span></h2><p><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="87.602ex" height="5.963ex" viewBox="0 -1509.8 37717.3 2567.2" role="img" focusable="false" style="vertical-align: -2.456ex;"><defs><path stroke-width="0" id="E44-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E44-MJMAIN-45" d="M128 619Q121 626 117 628T101 631T58 634H25V680H597V676Q599 670 611 560T625 444V440H585V444Q584 447 582 465Q578 500 570 526T553 571T528 601T498 619T457 629T411 633T353 634Q266 634 251 633T233 622Q233 622 233 621Q232 619 232 497V376H286Q359 378 377 385Q413 401 416 469Q416 471 416 473V493H456V213H416V233Q415 268 408 288T383 317T349 328T297 330Q290 330 286 330H232V196V114Q232 57 237 52Q243 47 289 47H340H391Q428 47 452 50T505 62T552 92T584 146Q594 172 599 200T607 247T612 270V273H652V270Q651 267 632 137T610 3V0H25V46H58Q100 47 109 49T128 61V619Z"></path><path stroke-width="0" id="E44-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E44-MJSZ1-2211" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path stroke-width="0" id="E44-MJSSI-61" d="M313 386Q286 386 260 381T217 369T186 355T164 342T155 337Q154 338 159 377T165 418Q251 461 320 461Q322 461 328 461T337 460Q397 460 435 424T473 329Q473 325 473 318T472 308Q432 110 407 2V0H317V2L325 38Q295 21 269 10Q215 -10 156 -10H149Q76 -10 62 69Q61 75 61 90Q61 127 73 150T116 194Q146 215 207 231T348 252H368L373 277Q378 302 378 318Q378 367 339 384Q332 386 313 386ZM150 116Q150 93 171 79T223 65Q259 65 293 85T341 135Q343 140 348 160T353 184Q353 186 342 186Q298 186 231 174T153 134Q150 127 150 116Z"></path><path stroke-width="0" id="E44-MJMATHI-71" d="M33 157Q33 258 109 349T280 441Q340 441 372 389Q373 390 377 395T388 406T404 418Q438 442 450 442Q454 442 457 439T460 434Q460 425 391 149Q320 -135 320 -139Q320 -147 365 -148H390Q396 -156 396 -157T393 -175Q389 -188 383 -194H370Q339 -192 262 -192Q234 -192 211 -192T174 -192T157 -193Q143 -193 143 -185Q143 -182 145 -170Q149 -154 152 -151T172 -148Q220 -148 230 -141Q238 -136 258 -53T279 32Q279 33 272 29Q224 -10 172 -10Q117 -10 75 30T33 157ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path stroke-width="0" id="E44-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path><path stroke-width="0" id="E44-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E44-MJMATHI-3B8" d="M35 200Q35 302 74 415T180 610T319 704Q320 704 327 704T339 705Q393 701 423 656Q462 596 462 495Q462 380 417 261T302 66T168 -10H161Q125 -10 99 10T60 63T41 130T35 200ZM383 566Q383 668 330 668Q294 668 260 623T204 521T170 421T157 371Q206 370 254 370L351 371Q352 372 359 404T375 484T383 566ZM113 132Q113 26 166 26Q181 26 198 36T239 74T287 161T335 307L340 324H145Q145 321 136 286T120 208T113 132Z"></path><path stroke-width="0" id="E44-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E44-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E44-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path stroke-width="0" id="E44-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E44-MJMAIN-2207" d="M46 676Q46 679 51 683H781Q786 679 786 676Q786 674 617 326T444 -26Q439 -33 416 -33T388 -26Q385 -22 216 326T46 676ZM697 596Q697 597 445 597T193 596Q195 591 319 336T445 80L697 596Z"></path><path stroke-width="0" id="E44-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E44-MJMAIN-3B" d="M78 370Q78 394 95 412T138 430Q162 430 180 414T199 371Q199 346 182 328T139 310T96 327T78 370ZM78 60Q78 85 94 103T137 121Q202 121 202 8Q202 -44 183 -94T144 -169T118 -194Q115 -194 106 -186T95 -174Q94 -171 107 -155T137 -107T160 -38Q161 -32 162 -22T165 -4T165 4Q165 5 161 4T142 0Q110 0 94 18T78 60Z"></path><path stroke-width="0" id="E44-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path stroke-width="0" id="E44-MJSZ3-5B" d="M247 -949V1450H516V1388H309V-887H516V-949H247Z"></path><path stroke-width="0" id="E44-MJSZ3-5D" d="M11 1388V1450H280V-949H11V-887H218V1388H11Z"></path><path stroke-width="0" id="E44-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E44-MJMATHI-3B3" d="M31 249Q11 249 11 258Q11 275 26 304T66 365T129 418T206 441Q233 441 239 440Q287 429 318 386T371 255Q385 195 385 170Q385 166 386 166L398 193Q418 244 443 300T486 391T508 430Q510 431 524 431H537Q543 425 543 422Q543 418 522 378T463 251T391 71Q385 55 378 6T357 -100Q341 -165 330 -190T303 -216Q286 -216 286 -188Q286 -138 340 32L346 51L347 69Q348 79 348 100Q348 257 291 317Q251 355 196 355Q148 355 108 329T51 260Q49 251 47 251Q45 249 31 249Z"></path><path stroke-width="0" id="E44-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E44-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E44-MJMATHI-76" d="M173 380Q173 405 154 405Q130 405 104 376T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Q21 294 29 316T53 368T97 419T160 441Q202 441 225 417T249 361Q249 344 246 335Q246 329 231 291T200 202T182 113Q182 86 187 69Q200 26 250 26Q287 26 319 60T369 139T398 222T409 277Q409 300 401 317T383 343T365 361T357 383Q357 405 376 424T417 443Q436 443 451 425T467 367Q467 340 455 284T418 159T347 40T241 -11Q177 -11 139 22Q102 54 102 117Q102 148 110 181T151 298Q173 362 173 380Z"></path><path stroke-width="0" id="E44-MJSZ1-5B" d="M202 -349V850H394V810H242V-309H394V-349H202Z"></path><path stroke-width="0" id="E44-MJSZ1-5D" d="M22 810V850H214V-349H22V-309H174V810H22Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E44-MJMAIN-3D" x="0" y="0"></use><use xlink:href="#E44-MJMAIN-45" x="1055" y="0"></use><g transform="translate(1903,0)"><use xlink:href="#E44-MJSZ3-5B"></use><g transform="translate(528,0)"><use xlink:href="#E44-MJSZ1-2211" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJSSI-61" x="506" y="-1203"></use></g><g transform="translate(1750,0)"><use xlink:href="#E44-MJMATHI-71" x="0" y="0"></use><g transform="translate(446,-230)"><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(405,0)"><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-29" x="929" y="0"></use></g></g></g><g transform="translate(3800,0)"><use xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E44-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-30" x="786" y="-213"></use></g><use xlink:href="#E44-MJMAIN-2C" x="1398" y="0"></use><use xlink:href="#E44-MJSSI-61" x="1843" y="0"></use><use xlink:href="#E44-MJMAIN-29" x="2324" y="0"></use></g><use xlink:href="#E44-MJMAIN-2207" x="6680" y="0"></use><use xlink:href="#E44-MJMATHI-3C0" x="7513" y="0"></use><g transform="translate(8253,0)"><use xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E44-MJSSI-61" x="389" y="0"></use><use xlink:href="#E44-MJMAIN-7C" x="870" y="0"></use><g transform="translate(1148,0)"><use xlink:href="#E44-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-30" x="786" y="-213"></use></g><use xlink:href="#E44-MJMAIN-3B" x="2157" y="0"></use><g transform="translate(2602,0)"><g><use xlink:href="#E44-MJMATHI-3B8" x="0" y="0"></use></g></g><use xlink:href="#E44-MJMATHI-3B8" x="2652" y="0"></use><use xlink:href="#E44-MJMAIN-29" x="3121" y="0"></use></g><use xlink:href="#E44-MJSZ3-5D" x="11763" y="-1"></use></g><use xlink:href="#E44-MJMAIN-2B" x="14417" y="0"></use><use xlink:href="#E44-MJMAIN-45" x="15417" y="0"></use><g transform="translate(16265,0)"><use xlink:href="#E44-MJSZ3-5B"></use><g transform="translate(528,0)"><use xlink:href="#E44-MJSZ1-2211" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJSSI-61" x="506" y="-1203"></use></g><use xlink:href="#E44-MJMATHI-3B3" x="1750" y="0"></use><g transform="translate(2293,0)"><use xlink:href="#E44-MJMATHI-71" x="0" y="0"></use><g transform="translate(446,-230)"><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(405,0)"><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-29" x="929" y="0"></use></g></g></g><g transform="translate(4343,0)"><use xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E44-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-31" x="786" y="-213"></use></g><use xlink:href="#E44-MJMAIN-2C" x="1398" y="0"></use><use xlink:href="#E44-MJSSI-61" x="1843" y="0"></use><use xlink:href="#E44-MJMAIN-29" x="2324" y="0"></use></g><use xlink:href="#E44-MJMAIN-2207" x="7223" y="0"></use><use xlink:href="#E44-MJMATHI-3C0" x="8056" y="0"></use><g transform="translate(8796,0)"><use xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E44-MJSSI-61" x="389" y="0"></use><use xlink:href="#E44-MJMAIN-7C" x="870" y="0"></use><g transform="translate(1148,0)"><use xlink:href="#E44-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-31" x="786" y="-213"></use></g><use xlink:href="#E44-MJMAIN-3B" x="2157" y="0"></use><g transform="translate(2602,0)"><g><use xlink:href="#E44-MJMATHI-3B8" x="0" y="0"></use></g></g><use xlink:href="#E44-MJMATHI-3B8" x="2652" y="0"></use><use xlink:href="#E44-MJMAIN-29" x="3121" y="0"></use></g><use xlink:href="#E44-MJSZ3-5D" x="12306" y="-1"></use></g><use xlink:href="#E44-MJMAIN-2B" x="29321" y="0"></use><g transform="translate(30322,0)"><use xlink:href="#E44-MJMATHI-3B3" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-32" x="778" y="513"></use></g><use xlink:href="#E44-MJMAIN-45" x="31326" y="0"></use><g transform="translate(32173,0)"><use xlink:href="#E44-MJSZ1-5B"></use><use xlink:href="#E44-MJMAIN-2207" x="417" y="0"></use><g transform="translate(1250,0)"><use xlink:href="#E44-MJMATHI-76" x="0" y="0"></use><g transform="translate(485,-186)"><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(405,0)"><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E44-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-29" x="929" y="0"></use></g></g></g><g transform="translate(3338,0)"><use xlink:href="#E44-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E44-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E44-MJMAIN-31" x="786" y="-213"></use></g><use xlink:href="#E44-MJMAIN-29" x="1398" y="0"></use></g><use xlink:href="#E44-MJSZ1-5D" x="5126" y="-1"></use></g></g></svg></span><script type="math/tex">=\E\left[\sum\limits_\sfa{q}_{\pi\left(\bftheta\right)}\left(\sfS_0,\sfa\right)\nabla\pi\left(\sfa|\sfS_0;\bftheta\right)\right]+\E\left[\sum\limits_\sfa\gamma{q}_{\pi\left(\bftheta\right)}\left(\sfS_1,\sfa\right)\nabla\pi\left(\sfa|\sfS_1;\bftheta\right)\right]+\gamma^2\E\left[\nabla{v}_{\pi\left(\bftheta\right)}\left(\sfS_1\right)\right]</script></p><p><span>改为</span></p><p><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="87.602ex" height="5.963ex" viewBox="0 -1509.8 37717.3 2567.2" role="img" focusable="false" style="vertical-align: -2.456ex;"><defs><path stroke-width="0" id="E45-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E45-MJMAIN-45" d="M128 619Q121 626 117 628T101 631T58 634H25V680H597V676Q599 670 611 560T625 444V440H585V444Q584 447 582 465Q578 500 570 526T553 571T528 601T498 619T457 629T411 633T353 634Q266 634 251 633T233 622Q233 622 233 621Q232 619 232 497V376H286Q359 378 377 385Q413 401 416 469Q416 471 416 473V493H456V213H416V233Q415 268 408 288T383 317T349 328T297 330Q290 330 286 330H232V196V114Q232 57 237 52Q243 47 289 47H340H391Q428 47 452 50T505 62T552 92T584 146Q594 172 599 200T607 247T612 270V273H652V270Q651 267 632 137T610 3V0H25V46H58Q100 47 109 49T128 61V619Z"></path><path stroke-width="0" id="E45-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E45-MJSZ1-2211" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path stroke-width="0" id="E45-MJSSI-61" d="M313 386Q286 386 260 381T217 369T186 355T164 342T155 337Q154 338 159 377T165 418Q251 461 320 461Q322 461 328 461T337 460Q397 460 435 424T473 329Q473 325 473 318T472 308Q432 110 407 2V0H317V2L325 38Q295 21 269 10Q215 -10 156 -10H149Q76 -10 62 69Q61 75 61 90Q61 127 73 150T116 194Q146 215 207 231T348 252H368L373 277Q378 302 378 318Q378 367 339 384Q332 386 313 386ZM150 116Q150 93 171 79T223 65Q259 65 293 85T341 135Q343 140 348 160T353 184Q353 186 342 186Q298 186 231 174T153 134Q150 127 150 116Z"></path><path stroke-width="0" id="E45-MJMATHI-71" d="M33 157Q33 258 109 349T280 441Q340 441 372 389Q373 390 377 395T388 406T404 418Q438 442 450 442Q454 442 457 439T460 434Q460 425 391 149Q320 -135 320 -139Q320 -147 365 -148H390Q396 -156 396 -157T393 -175Q389 -188 383 -194H370Q339 -192 262 -192Q234 -192 211 -192T174 -192T157 -193Q143 -193 143 -185Q143 -182 145 -170Q149 -154 152 -151T172 -148Q220 -148 230 -141Q238 -136 258 -53T279 32Q279 33 272 29Q224 -10 172 -10Q117 -10 75 30T33 157ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path stroke-width="0" id="E45-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path><path stroke-width="0" id="E45-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E45-MJMATHI-3B8" d="M35 200Q35 302 74 415T180 610T319 704Q320 704 327 704T339 705Q393 701 423 656Q462 596 462 495Q462 380 417 261T302 66T168 -10H161Q125 -10 99 10T60 63T41 130T35 200ZM383 566Q383 668 330 668Q294 668 260 623T204 521T170 421T157 371Q206 370 254 370L351 371Q352 372 359 404T375 484T383 566ZM113 132Q113 26 166 26Q181 26 198 36T239 74T287 161T335 307L340 324H145Q145 321 136 286T120 208T113 132Z"></path><path stroke-width="0" id="E45-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E45-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E45-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path stroke-width="0" id="E45-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E45-MJMAIN-2207" d="M46 676Q46 679 51 683H781Q786 679 786 676Q786 674 617 326T444 -26Q439 -33 416 -33T388 -26Q385 -22 216 326T46 676ZM697 596Q697 597 445 597T193 596Q195 591 319 336T445 80L697 596Z"></path><path stroke-width="0" id="E45-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E45-MJMAIN-3B" d="M78 370Q78 394 95 412T138 430Q162 430 180 414T199 371Q199 346 182 328T139 310T96 327T78 370ZM78 60Q78 85 94 103T137 121Q202 121 202 8Q202 -44 183 -94T144 -169T118 -194Q115 -194 106 -186T95 -174Q94 -171 107 -155T137 -107T160 -38Q161 -32 162 -22T165 -4T165 4Q165 5 161 4T142 0Q110 0 94 18T78 60Z"></path><path stroke-width="0" id="E45-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path stroke-width="0" id="E45-MJSZ3-5B" d="M247 -949V1450H516V1388H309V-887H516V-949H247Z"></path><path stroke-width="0" id="E45-MJSZ3-5D" d="M11 1388V1450H280V-949H11V-887H218V1388H11Z"></path><path stroke-width="0" id="E45-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E45-MJMATHI-3B3" d="M31 249Q11 249 11 258Q11 275 26 304T66 365T129 418T206 441Q233 441 239 440Q287 429 318 386T371 255Q385 195 385 170Q385 166 386 166L398 193Q418 244 443 300T486 391T508 430Q510 431 524 431H537Q543 425 543 422Q543 418 522 378T463 251T391 71Q385 55 378 6T357 -100Q341 -165 330 -190T303 -216Q286 -216 286 -188Q286 -138 340 32L346 51L347 69Q348 79 348 100Q348 257 291 317Q251 355 196 355Q148 355 108 329T51 260Q49 251 47 251Q45 249 31 249Z"></path><path stroke-width="0" id="E45-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E45-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E45-MJMATHI-76" d="M173 380Q173 405 154 405Q130 405 104 376T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Q21 294 29 316T53 368T97 419T160 441Q202 441 225 417T249 361Q249 344 246 335Q246 329 231 291T200 202T182 113Q182 86 187 69Q200 26 250 26Q287 26 319 60T369 139T398 222T409 277Q409 300 401 317T383 343T365 361T357 383Q357 405 376 424T417 443Q436 443 451 425T467 367Q467 340 455 284T418 159T347 40T241 -11Q177 -11 139 22Q102 54 102 117Q102 148 110 181T151 298Q173 362 173 380Z"></path><path stroke-width="0" id="E45-MJSZ1-5B" d="M202 -349V850H394V810H242V-309H394V-349H202Z"></path><path stroke-width="0" id="E45-MJSZ1-5D" d="M22 810V850H214V-349H22V-309H174V810H22Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E45-MJMAIN-3D" x="0" y="0"></use><use xlink:href="#E45-MJMAIN-45" x="1055" y="0"></use><g transform="translate(1903,0)"><use xlink:href="#E45-MJSZ3-5B"></use><g transform="translate(528,0)"><use xlink:href="#E45-MJSZ1-2211" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJSSI-61" x="506" y="-1203"></use></g><g transform="translate(1750,0)"><use xlink:href="#E45-MJMATHI-71" x="0" y="0"></use><g transform="translate(446,-230)"><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(405,0)"><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-29" x="929" y="0"></use></g></g></g><g transform="translate(3800,0)"><use xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E45-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-30" x="786" y="-213"></use></g><use xlink:href="#E45-MJMAIN-2C" x="1398" y="0"></use><use xlink:href="#E45-MJSSI-61" x="1843" y="0"></use><use xlink:href="#E45-MJMAIN-29" x="2324" y="0"></use></g><use xlink:href="#E45-MJMAIN-2207" x="6680" y="0"></use><use xlink:href="#E45-MJMATHI-3C0" x="7513" y="0"></use><g transform="translate(8253,0)"><use xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E45-MJSSI-61" x="389" y="0"></use><use xlink:href="#E45-MJMAIN-7C" x="870" y="0"></use><g transform="translate(1148,0)"><use xlink:href="#E45-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-30" x="786" y="-213"></use></g><use xlink:href="#E45-MJMAIN-3B" x="2157" y="0"></use><g transform="translate(2602,0)"><g><use xlink:href="#E45-MJMATHI-3B8" x="0" y="0"></use></g></g><use xlink:href="#E45-MJMATHI-3B8" x="2652" y="0"></use><use xlink:href="#E45-MJMAIN-29" x="3121" y="0"></use></g><use xlink:href="#E45-MJSZ3-5D" x="11763" y="-1"></use></g><use xlink:href="#E45-MJMAIN-2B" x="14417" y="0"></use><use xlink:href="#E45-MJMAIN-45" x="15417" y="0"></use><g transform="translate(16265,0)"><use xlink:href="#E45-MJSZ3-5B"></use><g transform="translate(528,0)"><use xlink:href="#E45-MJSZ1-2211" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJSSI-61" x="506" y="-1203"></use></g><use xlink:href="#E45-MJMATHI-3B3" x="1750" y="0"></use><g transform="translate(2293,0)"><use xlink:href="#E45-MJMATHI-71" x="0" y="0"></use><g transform="translate(446,-230)"><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(405,0)"><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-29" x="929" y="0"></use></g></g></g><g transform="translate(4343,0)"><use xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E45-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-31" x="786" y="-213"></use></g><use xlink:href="#E45-MJMAIN-2C" x="1398" y="0"></use><use xlink:href="#E45-MJSSI-61" x="1843" y="0"></use><use xlink:href="#E45-MJMAIN-29" x="2324" y="0"></use></g><use xlink:href="#E45-MJMAIN-2207" x="7223" y="0"></use><use xlink:href="#E45-MJMATHI-3C0" x="8056" y="0"></use><g transform="translate(8796,0)"><use xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E45-MJSSI-61" x="389" y="0"></use><use xlink:href="#E45-MJMAIN-7C" x="870" y="0"></use><g transform="translate(1148,0)"><use xlink:href="#E45-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-31" x="786" y="-213"></use></g><use xlink:href="#E45-MJMAIN-3B" x="2157" y="0"></use><g transform="translate(2602,0)"><g><use xlink:href="#E45-MJMATHI-3B8" x="0" y="0"></use></g></g><use xlink:href="#E45-MJMATHI-3B8" x="2652" y="0"></use><use xlink:href="#E45-MJMAIN-29" x="3121" y="0"></use></g><use xlink:href="#E45-MJSZ3-5D" x="12306" y="-1"></use></g><use xlink:href="#E45-MJMAIN-2B" x="29321" y="0"></use><g transform="translate(30322,0)"><use xlink:href="#E45-MJMATHI-3B3" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-32" x="778" y="513"></use></g><use xlink:href="#E45-MJMAIN-45" x="31326" y="0"></use><g transform="translate(32173,0)"><use xlink:href="#E45-MJSZ1-5B"></use><use xlink:href="#E45-MJMAIN-2207" x="417" y="0"></use><g transform="translate(1250,0)"><use xlink:href="#E45-MJMATHI-76" x="0" y="0"></use><g transform="translate(485,-186)"><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(405,0)"><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E45-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-29" x="929" y="0"></use></g></g></g><g transform="translate(3338,0)"><use xlink:href="#E45-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E45-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E45-MJMAIN-32" x="786" y="-213"></use></g><use xlink:href="#E45-MJMAIN-29" x="1398" y="0"></use></g><use xlink:href="#E45-MJSZ1-5D" x="5126" y="-1"></use></g></g></svg></span><script type="math/tex">=\E\left[\sum\limits_\sfa{q}_{\pi\left(\bftheta\right)}\left(\sfS_0,\sfa\right)\nabla\pi\left(\sfa|\sfS_0;\bftheta\right)\right]+\E\left[\sum\limits_\sfa\gamma{q}_{\pi\left(\bftheta\right)}\left(\sfS_1,\sfa\right)\nabla\pi\left(\sfa|\sfS_1;\bftheta\right)\right]+\gamma^2\E\left[\nabla{v}_{\pi\left(\bftheta\right)}\left(\sfS_2\right)\right]</script></p><h2 id='第131页算法7-3第23步'><span>第131页算法7-3第2.3步</span></h2><p><span>2.3（初始化回报和权重）</span></p><h4 id='改为-6'><span>改为</span></h4><p><span>2.3（初始化回报）</span></p><h2 id='第142页算法83第232步'><span>第142页算法8.3第2.3.2步</span></h2><p><span>作者注：这里的更新式子遵循了论文原文而没有考虑累积折扣。推导出现折扣是正确的；更新时考虑折扣也是正确和合理的。</span></p><h2 id='第143页第5行第143页算法84第234步共2处）'><span>第143页第5行，第143页算法8.4第2.3.4步（共2处）</span></h2><p><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="16.033ex" height="2.577ex" viewBox="0 -806.1 6903.2 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E46-MJMAIN-2207" d="M46 676Q46 679 51 683H781Q786 679 786 676Q786 674 617 326T444 -26Q439 -33 416 -33T388 -26Q385 -22 216 326T46 676ZM697 596Q697 597 445 597T193 596Q195 591 319 336T445 80L697 596Z"></path><path stroke-width="0" id="E46-MJMAIN-6C" d="M42 46H56Q95 46 103 60V68Q103 77 103 91T103 124T104 167T104 217T104 272T104 329Q104 366 104 407T104 482T104 542T103 586T103 603Q100 622 89 628T44 637H26V660Q26 683 28 683L38 684Q48 685 67 686T104 688Q121 689 141 690T171 693T182 694H185V379Q185 62 186 60Q190 52 198 49Q219 46 247 46H263V0H255L232 1Q209 2 183 2T145 3T107 3T57 1L34 0H26V46H42Z"></path><path stroke-width="0" id="E46-MJMAIN-6E" d="M41 46H55Q94 46 102 60V68Q102 77 102 91T102 122T103 161T103 203Q103 234 103 269T102 328V351Q99 370 88 376T43 385H25V408Q25 431 27 431L37 432Q47 433 65 434T102 436Q119 437 138 438T167 441T178 442H181V402Q181 364 182 364T187 369T199 384T218 402T247 421T285 437Q305 442 336 442Q450 438 463 329Q464 322 464 190V104Q464 66 466 59T477 49Q498 46 526 46H542V0H534L510 1Q487 2 460 2T422 3Q319 3 310 0H302V46H318Q379 46 379 62Q380 64 380 200Q379 335 378 343Q372 371 358 385T334 402T308 404Q263 404 229 370Q202 343 195 315T187 232V168V108Q187 78 188 68T191 55T200 49Q221 46 249 46H265V0H257L234 1Q210 2 183 2T145 3Q42 3 33 0H25V46H41Z"></path><path stroke-width="0" id="E46-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path><path stroke-width="0" id="E46-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E46-MJSSI-41" d="M28 0L429 694H533L585 350Q596 275 610 182T632 46L638 3V0H530L528 18Q527 25 515 103T503 183H223L135 29L118 1L73 0H28ZM492 254Q492 256 473 398T454 589V610Q433 552 290 301L264 255L378 254H492Z"></path><path stroke-width="0" id="E46-MJMAIN-2223" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E46-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E46-MJMAIN-3B" d="M78 370Q78 394 95 412T138 430Q162 430 180 414T199 371Q199 346 182 328T139 310T96 327T78 370ZM78 60Q78 85 94 103T137 121Q202 121 202 8Q202 -44 183 -94T144 -169T118 -194Q115 -194 106 -186T95 -174Q94 -171 107 -155T137 -107T160 -38Q161 -32 162 -22T165 -4T165 4Q165 5 161 4T142 0Q110 0 94 18T78 60Z"></path><path stroke-width="0" id="E46-MJMAINB-77" d="M624 444Q636 441 722 441Q797 441 800 444H805V382H741L593 11Q592 10 590 8T586 4T584 2T581 0T579 -2T575 -3T571 -3T567 -4T561 -4T553 -4H542Q525 -4 518 6T490 70Q474 110 463 137L415 257L367 137Q357 111 341 72Q320 17 313 7T289 -4H277Q259 -4 253 -2T238 11L90 382H25V444H32Q47 441 140 441Q243 441 261 444H270V382H222L310 164L382 342L366 382H303V444H310Q322 441 407 441Q508 441 523 444H531V382H506Q481 382 481 380Q482 376 529 259T577 142L674 382H617V444H624Z"></path><path stroke-width="0" id="E46-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E46-MJMAIN-2207" x="0" y="0"></use><g transform="translate(999,0)"><use xlink:href="#E46-MJMAIN-6C"></use><use xlink:href="#E46-MJMAIN-6E" x="278" y="0"></use></g><use xlink:href="#E46-MJMATHI-3C0" x="2000" y="0"></use><g transform="translate(2739,0)"><use xlink:href="#E46-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E46-MJSSI-41" x="389" y="0"></use><use xlink:href="#E46-MJMAIN-2223" x="1333" y="0"></use><use xlink:href="#E46-MJSSI-53" x="1889" y="0"></use><use xlink:href="#E46-MJMAIN-3B" x="2498" y="0"></use><use xlink:href="#E46-MJMAINB-77" x="2943" y="0"></use><use xlink:href="#E46-MJMAIN-29" x="3774" y="0"></use></g></g></svg></span><script type="math/tex">\nabla\ln\pi\left(\sfA\mid\sfS;\mathbf{w}\right)</script></p><p><span>改为</span></p><p><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="15.309ex" height="2.577ex" viewBox="0 -806.1 6591.5 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E47-MJMAIN-2207" d="M46 676Q46 679 51 683H781Q786 679 786 676Q786 674 617 326T444 -26Q439 -33 416 -33T388 -26Q385 -22 216 326T46 676ZM697 596Q697 597 445 597T193 596Q195 591 319 336T445 80L697 596Z"></path><path stroke-width="0" id="E47-MJMAIN-6C" d="M42 46H56Q95 46 103 60V68Q103 77 103 91T103 124T104 167T104 217T104 272T104 329Q104 366 104 407T104 482T104 542T103 586T103 603Q100 622 89 628T44 637H26V660Q26 683 28 683L38 684Q48 685 67 686T104 688Q121 689 141 690T171 693T182 694H185V379Q185 62 186 60Q190 52 198 49Q219 46 247 46H263V0H255L232 1Q209 2 183 2T145 3T107 3T57 1L34 0H26V46H42Z"></path><path stroke-width="0" id="E47-MJMAIN-6E" d="M41 46H55Q94 46 102 60V68Q102 77 102 91T102 122T103 161T103 203Q103 234 103 269T102 328V351Q99 370 88 376T43 385H25V408Q25 431 27 431L37 432Q47 433 65 434T102 436Q119 437 138 438T167 441T178 442H181V402Q181 364 182 364T187 369T199 384T218 402T247 421T285 437Q305 442 336 442Q450 438 463 329Q464 322 464 190V104Q464 66 466 59T477 49Q498 46 526 46H542V0H534L510 1Q487 2 460 2T422 3Q319 3 310 0H302V46H318Q379 46 379 62Q380 64 380 200Q379 335 378 343Q372 371 358 385T334 402T308 404Q263 404 229 370Q202 343 195 315T187 232V168V108Q187 78 188 68T191 55T200 49Q221 46 249 46H265V0H257L234 1Q210 2 183 2T145 3Q42 3 33 0H25V46H41Z"></path><path stroke-width="0" id="E47-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path><path stroke-width="0" id="E47-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E47-MJSSI-41" d="M28 0L429 694H533L585 350Q596 275 610 182T632 46L638 3V0H530L528 18Q527 25 515 103T503 183H223L135 29L118 1L73 0H28ZM492 254Q492 256 473 398T454 589V610Q433 552 290 301L264 255L378 254H492Z"></path><path stroke-width="0" id="E47-MJMAIN-2223" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E47-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E47-MJMAIN-3B" d="M78 370Q78 394 95 412T138 430Q162 430 180 414T199 371Q199 346 182 328T139 310T96 327T78 370ZM78 60Q78 85 94 103T137 121Q202 121 202 8Q202 -44 183 -94T144 -169T118 -194Q115 -194 106 -186T95 -174Q94 -171 107 -155T137 -107T160 -38Q161 -32 162 -22T165 -4T165 4Q165 5 161 4T142 0Q110 0 94 18T78 60Z"></path><path stroke-width="0" id="E47-MJMATHI-3B8" d="M35 200Q35 302 74 415T180 610T319 704Q320 704 327 704T339 705Q393 701 423 656Q462 596 462 495Q462 380 417 261T302 66T168 -10H161Q125 -10 99 10T60 63T41 130T35 200ZM383 566Q383 668 330 668Q294 668 260 623T204 521T170 421T157 371Q206 370 254 370L351 371Q352 372 359 404T375 484T383 566ZM113 132Q113 26 166 26Q181 26 198 36T239 74T287 161T335 307L340 324H145Q145 321 136 286T120 208T113 132Z"></path><path stroke-width="0" id="E47-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E47-MJMAIN-2207" x="0" y="0"></use><g transform="translate(999,0)"><use xlink:href="#E47-MJMAIN-6C"></use><use xlink:href="#E47-MJMAIN-6E" x="278" y="0"></use></g><use xlink:href="#E47-MJMATHI-3C0" x="2000" y="0"></use><g transform="translate(2739,0)"><use xlink:href="#E47-MJMAIN-28" x="0" y="0"></use><use xlink:href="#E47-MJSSI-41" x="389" y="0"></use><use xlink:href="#E47-MJMAIN-2223" x="1333" y="0"></use><use xlink:href="#E47-MJSSI-53" x="1889" y="0"></use><use xlink:href="#E47-MJMAIN-3B" x="2498" y="0"></use><g transform="translate(2943,0)"><g><use xlink:href="#E47-MJMATHI-3B8" x="0" y="0"></use></g></g><use xlink:href="#E47-MJMATHI-3B8" x="2993" y="0"></use><use xlink:href="#E47-MJMAIN-29" x="3462" y="0"></use></g></g></svg></span><script type="math/tex">\nabla\ln\pi\left(\sfA\mid\sfS;\bftheta\right)</script></p><h2 id='第144页第15行和第18行共2处）'><span>第144页第15行和第18行（共2处）</span></h2><p><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="22.392ex" height="2.928ex" viewBox="0 -806.1 9640.9 1260.5" role="img" focusable="false" style="vertical-align: -1.055ex;"><defs><path stroke-width="0" id="E48-MJMAIN-45" d="M128 619Q121 626 117 628T101 631T58 634H25V680H597V676Q599 670 611 560T625 444V440H585V444Q584 447 582 465Q578 500 570 526T553 571T528 601T498 619T457 629T411 633T353 634Q266 634 251 633T233 622Q233 622 233 621Q232 619 232 497V376H286Q359 378 377 385Q413 401 416 469Q416 471 416 473V493H456V213H416V233Q415 268 408 288T383 317T349 328T297 330Q290 330 286 330H232V196V114Q232 57 237 52Q243 47 289 47H340H391Q428 47 452 50T505 62T552 92T584 146Q594 172 599 200T607 247T612 270V273H652V270Q651 267 632 137T610 3V0H25V46H58Q100 47 109 49T128 61V619Z"></path><path stroke-width="0" id="E48-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E48-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E48-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E48-MJSSI-41" d="M28 0L429 694H533L585 350Q596 275 610 182T632 46L638 3V0H530L528 18Q527 25 515 103T503 183H223L135 29L118 1L73 0H28ZM492 254Q492 256 473 398T454 589V610Q433 552 290 301L264 255L378 254H492Z"></path><path stroke-width="0" id="E48-MJMAIN-223C" d="M55 166Q55 241 101 304T222 367Q260 367 296 349T362 304T421 252T484 208T554 189Q616 189 655 236T694 338Q694 350 698 358T708 367Q722 367 722 334Q722 260 677 197T562 134H554Q517 134 481 152T414 196T355 248T292 293T223 311Q179 311 145 286Q109 257 96 218T80 156T69 133Q55 133 55 166Z"></path><path stroke-width="0" id="E48-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path><path stroke-width="0" id="E48-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E48-MJMATHI-3B8" d="M35 200Q35 302 74 415T180 610T319 704Q320 704 327 704T339 705Q393 701 423 656Q462 596 462 495Q462 380 417 261T302 66T168 -10H161Q125 -10 99 10T60 63T41 130T35 200ZM383 566Q383 668 330 668Q294 668 260 623T204 521T170 421T157 371Q206 370 254 370L351 371Q352 372 359 404T375 484T383 566ZM113 132Q113 26 166 26Q181 26 198 36T239 74T287 161T335 307L340 324H145Q145 321 136 286T120 208T113 132Z"></path><path stroke-width="0" id="E48-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E48-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E48-MJMATHI-61" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path stroke-width="0" id="E48-MJMATHI-6B" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path><path stroke-width="0" id="E48-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E48-MJMAIN-45" x="0" y="0"></use><g transform="translate(681,-186)"><use transform="scale(0.707)" xlink:href="#E48-MJSSI-53" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E48-MJMATHI-74" x="786" y="-213"></use><use transform="scale(0.707)" xlink:href="#E48-MJMAIN-2C" x="911" y="0"></use><g transform="translate(840,0)"><use transform="scale(0.707)" xlink:href="#E48-MJSSI-41" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E48-MJMATHI-74" x="943" y="-213"></use></g><use transform="scale(0.707)" xlink:href="#E48-MJMAIN-223C" x="2211" y="0"></use><use transform="scale(0.707)" xlink:href="#E48-MJMATHI-3C0" x="2989" y="0"></use><g transform="translate(2519,0)"><use transform="scale(0.707)" xlink:href="#E48-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E48-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E48-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E48-MJMAIN-29" x="929" y="0"></use></g></g><g transform="translate(4398,0)"><use xlink:href="#E48-MJMAIN-5B"></use><g transform="translate(278,0)"><use xlink:href="#E48-MJMATHI-61" x="0" y="0"></use><g transform="translate(529,-150)"><use transform="scale(0.707)" xlink:href="#E48-MJMATHI-3C0" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E48-MJMATHI-6B" x="806" y="-213"></use></g></g><g transform="translate(1807,0)"><use xlink:href="#E48-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E48-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E48-MJMATHI-74" x="786" y="-213"></use></g><use xlink:href="#E48-MJMAIN-2C" x="1300" y="0"></use><g transform="translate(1744,0)"><use xlink:href="#E48-MJSSI-41" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E48-MJMATHI-74" x="943" y="-213"></use></g><use xlink:href="#E48-MJMAIN-29" x="2767" y="0"></use></g><use xlink:href="#E48-MJMAIN-5D" x="4964" y="0"></use></g></g></svg></span><script type="math/tex">\E_{\sfS_t,\sfA_t\sim\pi\left(\bftheta\right)}\left[a_{\pi_k}\left(\sfS_t,\sfA_t\right)\right]</script></p><h4 id='改为-7'><span>改为</span></h4><p><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="25.134ex" height="4.445ex" viewBox="0 -1208.2 10821.7 1913.9" role="img" focusable="false" style="vertical-align: -1.639ex;"><defs><path stroke-width="0" id="E49-MJMAIN-45" d="M128 619Q121 626 117 628T101 631T58 634H25V680H597V676Q599 670 611 560T625 444V440H585V444Q584 447 582 465Q578 500 570 526T553 571T528 601T498 619T457 629T411 633T353 634Q266 634 251 633T233 622Q233 622 233 621Q232 619 232 497V376H286Q359 378 377 385Q413 401 416 469Q416 471 416 473V493H456V213H416V233Q415 268 408 288T383 317T349 328T297 330Q290 330 286 330H232V196V114Q232 57 237 52Q243 47 289 47H340H391Q428 47 452 50T505 62T552 92T584 146Q594 172 599 200T607 247T612 270V273H652V270Q651 267 632 137T610 3V0H25V46H58Q100 47 109 49T128 61V619Z"></path><path stroke-width="0" id="E49-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E49-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E49-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E49-MJSSI-41" d="M28 0L429 694H533L585 350Q596 275 610 182T632 46L638 3V0H530L528 18Q527 25 515 103T503 183H223L135 29L118 1L73 0H28ZM492 254Q492 256 473 398T454 589V610Q433 552 290 301L264 255L378 254H492Z"></path><path stroke-width="0" id="E49-MJMAIN-223C" d="M55 166Q55 241 101 304T222 367Q260 367 296 349T362 304T421 252T484 208T554 189Q616 189 655 236T694 338Q694 350 698 358T708 367Q722 367 722 334Q722 260 677 197T562 134H554Q517 134 481 152T414 196T355 248T292 293T223 311Q179 311 145 286Q109 257 96 218T80 156T69 133Q55 133 55 166Z"></path><path stroke-width="0" id="E49-MJMATHI-3C0" d="M132 -11Q98 -11 98 22V33L111 61Q186 219 220 334L228 358H196Q158 358 142 355T103 336Q92 329 81 318T62 297T53 285Q51 284 38 284Q19 284 19 294Q19 300 38 329T93 391T164 429Q171 431 389 431Q549 431 553 430Q573 423 573 402Q573 371 541 360Q535 358 472 358H408L405 341Q393 269 393 222Q393 170 402 129T421 65T431 37Q431 20 417 5T381 -10Q370 -10 363 -7T347 17T331 77Q330 86 330 121Q330 170 339 226T357 318T367 358H269L268 354Q268 351 249 275T206 114T175 17Q164 -11 132 -11Z"></path><path stroke-width="0" id="E49-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E49-MJMATHI-3B8" d="M35 200Q35 302 74 415T180 610T319 704Q320 704 327 704T339 705Q393 701 423 656Q462 596 462 495Q462 380 417 261T302 66T168 -10H161Q125 -10 99 10T60 63T41 130T35 200ZM383 566Q383 668 330 668Q294 668 260 623T204 521T170 421T157 371Q206 370 254 370L351 371Q352 372 359 404T375 484T383 566ZM113 132Q113 26 166 26Q181 26 198 36T239 74T287 161T335 307L340 324H145Q145 321 136 286T120 208T113 132Z"></path><path stroke-width="0" id="E49-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E49-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E49-MJMATHI-61" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path stroke-width="0" id="E49-MJMATHI-6B" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path><path stroke-width="0" id="E49-MJSZ1-28" d="M152 251Q152 646 388 850H416Q422 844 422 841Q422 837 403 816T357 753T302 649T255 482T236 250Q236 124 255 19T301 -147T356 -251T403 -315T422 -340Q422 -343 416 -349H388Q359 -325 332 -296T271 -213T212 -97T170 56T152 251Z"></path><path stroke-width="0" id="E49-MJSZ1-29" d="M305 251Q305 -145 69 -349H56Q43 -349 39 -347T35 -338Q37 -333 60 -307T108 -239T160 -136T204 27T221 250T204 473T160 636T108 740T60 807T35 839Q35 850 50 850H56H69Q197 743 256 566Q305 425 305 251Z"></path><path stroke-width="0" id="E49-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path stroke-width="0" id="E49-MJSZ2-5B" d="M224 -649V1150H455V1099H275V-598H455V-649H224Z"></path><path stroke-width="0" id="E49-MJSZ2-5D" d="M16 1099V1150H247V-649H16V-598H196V1099H16Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E49-MJMAIN-45" x="0" y="0"></use><g transform="translate(681,-186)"><use transform="scale(0.707)" xlink:href="#E49-MJSSI-53" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E49-MJMATHI-74" x="786" y="-213"></use><use transform="scale(0.707)" xlink:href="#E49-MJMAIN-2C" x="911" y="0"></use><g transform="translate(840,0)"><use transform="scale(0.707)" xlink:href="#E49-MJSSI-41" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E49-MJMATHI-74" x="943" y="-213"></use></g><use transform="scale(0.707)" xlink:href="#E49-MJMAIN-223C" x="2211" y="0"></use><use transform="scale(0.707)" xlink:href="#E49-MJMATHI-3C0" x="2989" y="0"></use><g transform="translate(2519,0)"><use transform="scale(0.707)" xlink:href="#E49-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><g><use transform="scale(0.707)" xlink:href="#E49-MJMATHI-3B8" x="0" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E49-MJMATHI-3B8" x="460" y="0"></use><use transform="scale(0.707)" xlink:href="#E49-MJMAIN-29" x="929" y="0"></use></g></g><g transform="translate(4398,0)"><use xlink:href="#E49-MJSZ2-5B"></use><g transform="translate(472,0)"><use xlink:href="#E49-MJMATHI-61" x="0" y="0"></use><g transform="translate(529,-150)"><use transform="scale(0.707)" xlink:href="#E49-MJMATHI-3C0" x="0" y="0"></use><g transform="translate(403,-182)"><use transform="scale(0.5)" xlink:href="#E49-MJSZ1-28"></use><g transform="translate(229,0)"><g><use transform="scale(0.5)" xlink:href="#E49-MJMATHI-3B8" x="0" y="0"></use></g></g><g transform="translate(279,0)"><use transform="scale(0.5)" xlink:href="#E49-MJMATHI-3B8" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E49-MJMATHI-6B" x="469" y="-350"></use></g><use transform="scale(0.5)" xlink:href="#E49-MJSZ1-29" x="1648" y="-1"></use></g></g></g><g transform="translate(2794,0)"><use xlink:href="#E49-MJMAIN-28" x="0" y="0"></use><g transform="translate(389,0)"><use xlink:href="#E49-MJSSI-53" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E49-MJMATHI-74" x="786" y="-213"></use></g><use xlink:href="#E49-MJMAIN-2C" x="1300" y="0"></use><g transform="translate(1744,0)"><use xlink:href="#E49-MJSSI-41" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E49-MJMATHI-74" x="943" y="-213"></use></g><use xlink:href="#E49-MJMAIN-29" x="2767" y="0"></use></g><use xlink:href="#E49-MJSZ2-5D" x="5950" y="-1"></use></g></g></svg></span><script type="math/tex">\E_{\sfS_t,\sfA_t\sim\pi\left(\bftheta\right)}\left[a_{\pi_{\left(\bftheta_k\right)}}\left(\sfS_t,\sfA_t\right)\right]</script></p><h3 id='第161页第45行'><span>第161页第4~5行</span></h3><p><span>作者注：“动作价值网络和策略网络往往采用矢量形式的输出”指的是动作空间是离散动作空间的情况。如果动作空间是连续动作空间，往往用Gaussian形式的策略，详见GitHub代码。包括SAC算法在内的使用策略梯度的算法采用Gaussian形式策略后，策略被限制为了单峰（unimodal）形式。如果需要策略具有多峰（multimodal）形式，可以使用混合Gaussian模型（Gaussian Mixture Model，GMM）。</span></p><h2 id='第161页86节'><span>第161页8.6节</span></h2><p><span>作者注：这个案例的动作空间是离散的。同时，本章介绍的算法，包括但不限于PPO、SAC等算法，也可用于动作空间是连续空间的情况。</span></p><h2 id='第177页算法9-2第231步'><span>第177页算法9-2第2.3.1步</span></h2><p><span>2.3.1（执行）用</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="6.8ex" height="2.577ex" viewBox="0 -806.1 2927.6 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E50-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E50-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E50-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path stroke-width="0" id="E50-MJMAIN-2223" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E50-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E50-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E50-MJMATHI-62" x="0" y="0"></use><use xlink:href="#E50-MJMAIN-28" x="429" y="0"></use><use xlink:href="#E50-MJMAIN-22C5" x="818" y="0"></use><use xlink:href="#E50-MJMAIN-2223" x="1373" y="0"></use><use xlink:href="#E50-MJSSI-53" x="1929" y="0"></use><use xlink:href="#E50-MJMAIN-29" x="2538" y="0"></use></g></svg></span><script type="math/tex">b(\cdot\mid\sfS)</script><span>得到</span></p><h3 id='改为-8'><span>改为</span></h3><p><span>2.3.1（执行）用行为策略</span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="4.218ex" height="2.577ex" viewBox="0 -806.1 1816 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E51-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E51-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E51-MJSSI-53" d="M161 478Q161 568 242 642T435 716Q527 716 599 673L609 667Q595 633 589 615L571 568Q570 568 564 575T546 592T518 611T475 628T417 635Q351 635 305 596T259 507Q259 465 290 444T372 411T432 396Q473 385 509 343T545 236Q545 140 464 59T270 -22Q155 -22 54 48L92 146Q93 146 101 138T124 117T161 92T216 72T288 63Q360 63 403 109T447 204Q447 220 444 233T435 256T421 273T404 285T385 295T366 301T347 306T331 310T315 314T292 321T265 331T235 346T207 367T183 395T168 431T161 478Z"></path><path stroke-width="0" id="E51-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E51-MJMATHI-62" x="0" y="0"></use><use xlink:href="#E51-MJMAIN-28" x="429" y="0"></use><use xlink:href="#E51-MJSSI-53" x="818" y="0"></use><use xlink:href="#E51-MJMAIN-29" x="1427" y="0"></use></g></svg></span><script type="math/tex">b(\sfS)</script><span>得到</span></p></div></div>
</body>
</html>